##### SETUP #####

rm(list=ls())

pacman::p_load(
  emmeans,
  forcats,
  foreign,
  ggplot2,
  ggpubr,
  gridExtra,
  kableExtra,
  knitr,
  magrittr,
  tidyverse,
  psych,
  purrr,
  readxl,
  reshape2,
  rstatix,
  stargazer,
  stats,
  texreg
)


##### SURVEY 1 DATA #####

df1.raw <- read_excel(here::here("Copy of Bomb2_April 2, 2021_12.35 Numeric Max.xlsx"), skip=1) %>%
  select(1:9) %>%
  mutate(across(1:4, ~replace(., .=="2", 0))) %>%
  set_colnames(c("control", "time", "modest", "combined", "age", "gender",
                 "party", "useful", "long"))

df1 <- df1.raw %>%
  pivot_longer(1:4,
               cols = c("control", "time", "modest", "combined"),
               names_to = "treatment",
               values_to = "support",
               values_drop_na=TRUE) %>%
  mutate(oppose = abs(support - 1)) %>% # generate oppose variable
  mutate(age2 = recode(age,
                       "1" = "18 to 29",
                       "2" = "30 to 44",
                       "3" = "45 to 60",
                       "4" = "60+")) %>%
  mutate(gender2 = recode(gender,
                          "1" = "Male",
                          "2" = "Female",
                          "3" = "Other")) %>%
  mutate(party2 = recode(party,
                                 "10" = "Independent",
                                 "11" = "Democrat",
                                 "12" = "Republican",
                                 "13" = "Other")) %>%
  mutate(useful2 = ifelse(useful <= 2, "High", 
                          ifelse(useful == 5, "Mixed", "Low")),
         .after=useful)


fit1 <- lm(data=df1,
           support ~ treatment - 1)

         

##### 1 EXHIBIT 1 #####
# A visual with confidence intervals for Survey 1, comparing the total answers for the control, modest, lengthy, and mixed treatments (but without breaking it into the separate demographics). That should yield a relatively uncluttered  visual, I hope.

fit1.se <- sqrt(diag(vcov(fit1)))
viz1 <- as.data.frame(cbind(fit1$coefficients, fit1.se))
colnames(viz1) <- c("coef", "se")

viz1$ci.low <- viz1$coef - (1.96 * viz1$se)
viz1$ci.high <- viz1$coef + (1.96 * viz1$se)

viz1 <- viz1 %>%
  mutate(treatment = as.factor(c("Combined", "Control", "Modest", "Time"))) %>%
  mutate(treatment = fct_relevel(treatment, 
                                 "Control",
                                 "Modest",
                                 "Time",
                                 "Combined"))

# viz1$treatment <- ordered(c("Control", "Modest", "Time", "Combined"),
#                            levels = c("Control", "Modest", "Time", "Combined"))

plot1 <- ggplot(data = viz1,
                aes(x=treatment, y=coef)) +
  geom_bar(stat="identity", width=0.75, fill="gray60") +
  geom_errorbar (aes(ymin=ci.low, ymax=ci.high),
                 width=0.3,
                 size = 0.6, 
                 color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank())

plot1

ggsave("plot1.png", plot1)

##### 2 EXHIBIT 2 #####
# A second visual with confidence intervals for Survey 1, comparing only the control treatment to the lengthy treatment across all demographics.

treat2 <- c("Control", "Time")
gender2 <- as.character(c("Male", "Female", "Other"))
age2 <- c("30 to 44", "18 to 29", "45 to 60", "60+")
party2 <- c("Republican", "Democrat", "Independent", "Other")
useful2 <- c("Low", "Mixed", "High")

## A. GENDER ----

fit2.gender <- lm(data=df1,
            support ~ treatment * gender2,
            subset = treatment %in% c("control", "time"))
summary(fit2.gender)

newdata.gender <- model.frame(fit2.gender) %>%
  select(-"support") %>%
  distinct() %>%
  arrange(treatment, gender2)

pred2.gender <- 
  cbind(newdata.gender, predict(fit2.gender, 
                                newdata=newdata.gender, 
                                se.fit=TRUE)) %>%
  filter(gender2 != "Other") %>%
  mutate(ci.low = fit - (1.96 * se.fit)) %>%
  mutate(ci.high = fit + (1.96 * se.fit))

plot1.gender <- ggplot(data=pred2.gender,
                       aes(x=gender2, y=fit, fill=treatment)) +
  geom_bar(position="dodge", stat="identity") +
  geom_errorbar(aes(ymin=ci.low, ymax=ci.high), 
                position=position_dodge(width=0.9), 
                width=0.4,
                alpha=3, 
                size=0.5,
                color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank()) +
  scale_fill_grey(start=0.5, end=0.85,
                  name="Treatment",
                  labels=c("Control", "Time")) +
  ylim(-0.05, 1.05) +
  ggtitle("Gender") +
  theme(legend.title=element_blank(),
        plot.title = element_text(size = 12, face = "bold"))

plot1.gender

## B. AGE -----

fit2.age <- lm(data=df1,
                  support ~ treatment * age2,
                  subset = treatment %in% c("control", "time"))
summary(fit2.age)

newdata.age <- model.frame(fit2.age) %>%
  select(-"support") %>%
  distinct() %>%
  arrange(treatment, age2)

pred2.age <- 
  cbind(newdata.age, predict(fit2.age, 
                                newdata=newdata.age, 
                                se.fit=TRUE)) %>%
  filter(age2 != "Other") %>%
  mutate(ci.low = fit - (1.96 * se.fit)) %>%
  mutate(ci.high = fit + (1.96 * se.fit))

plot1.age <- ggplot(data=pred2.age,
                       aes(x=age2, y=fit, fill=treatment)) +
  geom_bar(position="dodge", stat="identity") +
  geom_errorbar(aes(ymin=ci.low, ymax=ci.high), 
                position=position_dodge(width=0.9), 
                width=0.4,
                alpha=3, 
                size=0.5,
                color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank()) +
  scale_fill_grey(start=0.5, end=0.85,
                  name="Treatment",
                  labels=c("Control", "Time")) +
  ylim(-0.05, 1.05) +
  ggtitle("Age") +
  theme(legend.title=element_blank(),
        plot.title = element_text(size = 12, face = "bold"))

plot1.age


## C. PARTY -----

fit2.party <- lm(data=df1,
               support ~ treatment * party2,
               subset = treatment %in% c("control", "time"))
summary(fit2.party)

newdata.party <- model.frame(fit2.party) %>%
  select(-"support") %>%
  distinct() %>%
  arrange(treatment, party2)

pred2.party <- 
  cbind(newdata.party, predict(fit2.party, 
                             newdata=newdata.party, 
                             se.fit=TRUE)) %>%
  filter(party2 != "Other") %>%
  mutate(ci.low = fit - (1.96 * se.fit)) %>%
  mutate(ci.high = fit + (1.96 * se.fit))

plot1.party <- ggplot(data=pred2.party,
                    aes(x=party2, y=fit, fill=treatment)) +
  geom_bar(position="dodge", stat="identity") +
  geom_errorbar(aes(ymin=ci.low, ymax=ci.high), 
                position=position_dodge(width=0.9), 
                width=0.4,
                alpha=3, 
                size=0.5,
                color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank()) +
  scale_fill_grey(start=0.5, end=0.85,
                  name="Treatment",
                  labels=c("Control", "Time")) +
  ylim(-0.05, 1.05) +
  ggtitle("Party") +
  theme(legend.title=element_blank(),
        plot.title = element_text(size = 12, face = "bold"))

plot1.party

## D. UTILITY OF TORTURE -----

fit2.useful <- lm(data=df1,
                 support ~ treatment * useful2,
                 subset = treatment %in% c("control", "time"))
summary(fit2.useful)

newdata.useful <- model.frame(fit2.useful) %>%
  select(-"support") %>%
  distinct() %>%
  arrange(treatment, useful2)

newdata.useful  

pred2.useful <- 
  cbind(newdata.useful, predict(fit2.useful, 
                               newdata=newdata.useful, 
                               se.fit=TRUE)) %>%
  filter(useful2 != "Other") %>%
  mutate(ci.low = fit - (1.96 * se.fit)) %>%
  mutate(ci.high = fit + (1.96 * se.fit))

plot1.useful <- ggplot(data=pred2.useful,
                      aes(x=reorder(useful2, -fit), 
                          y=fit, fill=treatment)) +
  geom_bar(position="dodge", stat="identity") +
  geom_errorbar(aes(ymin=ci.low, ymax=ci.high), 
                position=position_dodge(width=0.9), 
                width=0.4,
                alpha=3, 
                size=0.5,
                color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank()) +
  scale_fill_grey(start=0.5, end=0.85,
                  name="Treatment",
                  labels=c("Control", "Time")) +
  ylim(-0.05, 1.05) +
  ggtitle("Belief in Utility of Torture") +
  theme(legend.title=element_blank(),
        plot.title = element_text(size = 12, face = "bold"))

plot1.useful

###### OUTPUT #####
plot2.overall <- ggarrange(plot1.gender, plot1.age, plot1.party, plot1.useful,
          legend="bottom",
          common.legend = TRUE)

ggsave("plot2_overall.png", plot2.overall, device="png")

##### 3 MULTIVARIATE ANALYSIS ####
# A multivariate analysis of the data from Survey 1.  It'll be cluttered and ugly so I'll put it in an appendix.

### LINEAR PROBABILITY MODEL

fit3.a <- lm(data=df1, 
             support ~ treatment -1)


fit3.b <- lm(data=df1, 
           support ~
             treatment + gender2 +
             treatment + age2 +
             treatment + party2 +
             treatment + useful2 -1)

out3 <- stargazer(fit3.a, 
                  fit3.b,
                  type="html",
                  style="ajps",
                  out = "out3.html",
                  order = c(2:4,1,5:9,12,10:11,12:14),
                  covariate.labels = c("Control",
                                       "Modest",
                                       "Time",
                                       "Combined",
                                       "Male",
                                       "Other",
                                       "30 to 44",
                                       "45 to 60",
                                       "60+",
                                       "Republican",
                                       "Independent",
                                       "Other",
                                       "Low confidence",
                                       
                                       "Mixed confidence"),
                  # column.labels = c("Treatments Only", 
                  #                   "With Covariates"),
                  dep.var.labels.include=FALSE,
                  align=TRUE,
                  model.numbers = TRUE,
                  omit.stat = c("f", "ser"))


###### ANOVA of differences #####

anova.a <- pairs(emmeans(fit3.a, ~treatment))

anova.a <- anova.a %>%
  kable(col.names = c("Pair",
                     "Estimate",
                     "SE",
                     "Df",
                     "T-ratio",
                     "P-value"),
        format="html",
       digits = 3)

save_kable(anova.a, file="anova_lpm.html", format="html")


anova.b <- pairs(emmeans(fit3.b, ~treatment))
anova.b

## LOGIT

fit3.c <- glm(data=df1, 
             support ~ treatment -1,
             family="binomial")


fit3.d <- glm(data=df1, 
           support ~
             treatment + gender2 +
             treatment + age2 +
             treatment + party2 +
             treatment + useful2 -1,
           family="binomial")

out3b <- stargazer(fit3.c, fit3.d,
                  type="html",
                  title="Logit Model for Survey 1",
                  style="ajps",
                  out = "out3b.html",
                  order = c(2:4,1,5:9,12,10:11,12:14),
                  covariate.labels = c("Control",
                                       "Modest",
                                       "Time",
                                       "Combined",
                                       "Male",
                                       "Other",
                                       "30 to 44",
                                       "45 to 60",
                                       "60+",
                                       "Republican",
                                       "Independent",
                                       "Other",
                                       "Low confidence",
                                       
                                       "Mixed confidence"),
                  # column.labels = c("Treatments Only", 
                  #                   "With Covariates"),
                  dep.var.labels.include=FALSE,
                  align=TRUE,
                  model.numbers = TRUE,
                  omit.stat = c("f", "ser", "aic"))

anova.c <- pairs(emmeans(fit3.c, ~treatment))
anova.c

anova.d <- pairs(emmeans(fit3.d, ~treatment))
anova.d

# 4 EXHIBIT 4 -----
# 4.  A third and final visual with confidence intervals for Survey 2, comparing Q1' to Q3' (but without breaking it into the separate demographics).

##### SURVEY 2 DATA #####

df2 <- read_excel("Copy of Bomb3_April 2, 2021_12.36 Numeric.xlsx",
                  col_names=TRUE,
                  col_types = "numeric") %>%
  slice(-1) %>%
  mutate(Q1 = ifelse(Q1==2,0,1)) %>%
  mutate(Q22 = ifelse(Q22==2,0,1))


df2$support1 <- df2$Q1
df2$support2 <- ifelse(df2$Q1==0, 0,
                      ifelse(df2$Q22==0, 0,1))

prop.table(table(df2$Q1))
prop.table(table(df2$Q22))
prop.table(table(df2$oppose2))

viz2 <-  data.frame(tail(describe(df2),2)) %>%
  mutate(names = as.factor(c("Initial Support (Q1')", 
                             "Support for Lengthy Interrogation (Q3')"))) %>%
  mutate(names = fct_relevel(names, "Initial Support (Q1')", 
                             "Support for Lengthy Interrogation (Q3')")) %>%
  mutate(ci.low = mean - (1.96 * se)) %>%
  mutate(ci.high = mean + (1.96 * se))

plot4 <- ggplot(data=viz2, 
       aes(x=names, y = mean)) +
  geom_bar(width = 0.5, stat="identity") +
  geom_errorbar(aes(ymin=ci.low, ymax=ci.high), 
                position=position_dodge(width=0.9), 
                width=0.4,
                alpha=3, 
                size=0.5,
                color = "grey3") +
  theme_minimal() +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major.x = element_blank()) +
  ylim(-0.05, 1) +
  theme(legend.title=element_blank(),
        plot.title = element_text(size = 12, face = "bold"))

ggsave("plot4.png", plot4, device="png")
